import lxml.etree as le

with open('edu.html','r',encoding='utf-8') as f: #打开edu.html文档
    html=f.read() #读取上述文档 并赋值给html
    html_x=le.HTML(html) #将字符串转化为Xpath对象
    div_x_s=html_x.xpath('//div[contains(@class,"rank_list bd_rank_wrap")]')
    data_s=[]
    for div_x in div_x_s:
        category1=div_x.xpath('./h3/text()')
        print(category1)
        category2_s=div_x.xpath('.//h4/text()')
        data_s.append(
            dict(
                category1=category1,
                category2_s=category2_s
            )
        )
print(data_s)
for data in data_s:
    print(data.get('category1')[0])
    for category2 in data.get('category2_s'):
        print('   ',category2)